#!/usr/bin/env perl

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

###############################################################################

	#
	# Do
	# egrep '^#|name.*=>' hcat.conf | egrep -v '^#!|egrep' | less
	# to get an outline of this test conf file
	#
	
  # Has a couple of Hive set directives:
  #   set hive.exec.dynamic.partition.mode=nonstrict;
  #   set hive.exec.dynamic.partition=true;


$cfg = {
        'driver' => 'Pig',
		'groups' => [
# This first group should be moved to deployer ?
		{
			'name' => 'Pig_Checkin',
			'tests' => [

				{
				 'num' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_checkin_1;
create table pig_checkin_1 (name string, age int, gpa double) STORED AS TEXTFILE;\
				,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
store a into 'pig_checkin_1' using org.apache.hive.hcatalog.pig.HCatStorer();\,
                                ,'result_table' => 'pig_checkin_1'
                                ,'sql' => q\select * from studenttab10k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				}, 
				{
				 'num' => 2
                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = load 'votertab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
c = join a by name, b by name;
store c into ':OUTPATH:';\,
				,'sql'   => [ 'select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);']
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				}, 
				{
				 'num' => 3
                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = load ':INPATH:/votertab10k' as (name:chararray, age:int, registration:chararray, contributions:float);
c = join a by name, b by name;
store c into ':OUTPATH:';\
				,'sql' => q\select s.name, s.age, gpa, v.name, v.age, registration, contributions from studenttab10k s join votertab10k v on (s.name = v.name);\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				}, 
				{
				 'num' => 4
                                ,'hcat_prep'=>q\drop table if exists pig_checkin_4_1;
drop table if exists pig_checkin_4_2;
create table pig_checkin_4_1 (name string, age int, gpa double) STORED AS TEXTFILE;
create table pig_checkin_4_2 (name string, age int, gpa double) STORED AS TEXTFILE;\
                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
split a into b if age <=40, c if age > 40;
store b into 'pig_checkin_4_1' using org.apache.hive.hcatalog.pig.HCatStorer();
store c into 'pig_checkin_4_2' using org.apache.hive.hcatalog.pig.HCatStorer();\,
                                ,'result_table' => ['pig_checkin_4_1','pig_checkin_4_2']
				,'sql'   => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				}, 
				{
				 'num' => 5
                                ,'hcat_prep'=>q\drop table if exists pig_checkin_5;
create table pig_checkin_5 (name string, age int, gpa double) STORED AS TEXTFILE;\
                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
split a into b if age <=40, c if age > 40;
store b into 'pig_checkin_5' using org.apache.hive.hcatalog.pig.HCatStorer();
store c into ':OUTPATH:';\,
                                ,'result_table' => ['pig_checkin_5','?']
				,'sql'   => [ 'select * from studenttab10k where age<=40;', 'select * from studenttab10k where age>40;']
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				},
                {
				 'num' => 6
                                ,'hcat_prep'=>q\drop table if exists pig_checkin_6;
create table pig_checkin_6 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\
                                ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = filter a by ds == '20110924';
c = foreach b generate name, age;
store c into 'pig_checkin_6' using org.apache.hive.hcatalog.pig.HCatStorer('ds=20110924');\,
#dump a;\,
                                ,'result_table' => 'pig_checkin_6',
				,'sql'   => "select name, age, ds from studentparttab30k where ds='20110924';",
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				},
                {
				 'num' => 7
                                ,'hcat_prep'=>q\drop table if exists pig_checkin_7;
create table pig_checkin_7 (name string, age int) partitioned by (ds string) STORED AS TEXTFILE;\
                                ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, ds;
store b into 'pig_checkin_7' using org.apache.hive.hcatalog.pig.HCatStorer();\,
                                ,'result_table' => 'pig_checkin_7',
				,'sql'   => "select name, age, ds from studentparttab30k;",
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
				} 

			],
 		}, # end g
                {
                        'name' => 'Pig_Read',
                        'tests' => [

                                {
                                 'ignore' => 1, # Need to checkin HCATALOG-168.
                                 'num' => 1
                                ,'pig' => q\a = load 'all100k' using org.apache.hive.hcatalog.pig.HCatLoader();
store a into ':OUTPATH:';\,
				,'sql' => q\select * from all100k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 'num' => 2
                                ,'pig' => q\a = load 'all100kjson' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate s, i, d;
store b into ':OUTPATH:';\,
				,'sql' => q\select s, i, d from all100kjson;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 'num' => 3
                                ,'pig' => q\a = load 'all100krc' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age;
store b into ':OUTPATH:';\,
				,'sql' => q\select name, age from all100krc;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 # A table with one partition in text and one in rc
                                 'num' => 4
                                ,'hcat_prep'=>q?drop table if exists pig_read_4;
create external table pig_read_4 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile;
alter table pig_read_4 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';
alter table pig_read_4 set fileformat rcfile;
alter table pig_read_4 add partition (b='2') location '/user/hcat/tests/data/all100krc';?
                                ,'pig' => q\a = load 'pig_read_4' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, b;
store b into ':OUTPATH:';\,
				,'sql' => q\(select name, age, 1 from studenttab10k)
                            union all
                            (select name, age, 2 from all100krc);\
                                },
                                {
                                 # Read from a table in the non-default database
                                 'num' => 5
                                ,'hcat_prep'=>q?create database if not exists pig_db_1;
drop table if exists pig_db_1.pig_read_5;
create external table pig_db_1.pig_read_5 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as textfile;
use pig_db_1;
alter table pig_read_5 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';?
                                ,'pig' => q\a = load 'pig_db_1.pig_read_5' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, b;
store b into ':OUTPATH:';\,
				,'sql' => q\select name, age, 1 from studenttab10k;\
                                }
                        ],
                }, # end g
                {
                        'name' => 'Pig_Write',
                        'tests' => [
                                {
                                 'ignore' => 1, # Need to checkin HCATALOG-168.
                                 'num' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_write_1;
create table pig_write_1(t tinyint,si smallint,i int,b bigint,bool boolean,f float,d double,s string) stored as rcfile;\
                                ,'pig' => q\a = load ':INPATH:/all100k' using PigStorage(':') as (t:int,si:int,i:int,b:int,bo:boolean,f:float,d:double,s:chararray);
store a into 'pig_write_1' using org.apache.hive.hcatalog.pig.HCatStorer();\,
                                ,'result_table' => 'pig_write_1'
				,'sql' => q\select * from all100k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 'num' => 2
                                ,'hcat_prep'=>q\drop table if exists pig_write_2;
create table pig_write_2(
            s string,
            i int,
            d double,
            m map<string, string>,
            bb array<struct<a: int, b: string>>)
            row format serde 'org.apache.hive.hcatalog.data.JsonSerDe'
            STORED AS TEXTFILE;
\
                                ,'pig' => q\a = load 'all100kjson' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate s, i, d;
store b into 'pig_write_2' using org.apache.hive.hcatalog.pig.HCatStorer();\,
				,'sql' => q\select IFNULL(s, ""), IFNULL(i, ""), IFNULL(d, "") from all100kjson;\
                                ,'result_table' => 'pig_write_2'
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 'num' => 3
                                ,'hcat_prep'=>q\drop table if exists pig_write_3;
create table pig_write_3(
            name string,
            age int,
            gpa double)
stored as rcfile;
\
                                ,'pig' => q\a = load 'all100krc' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age;
store b into 'pig_write_3' using org.apache.hive.hcatalog.pig.HCatStorer();\,
				,'sql' => q\select name, age from all100krc;\
                                ,'result_table' => 'pig_write_3'
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 # Store in a sequence file
                                 'num' => 4
                                ,'hcat_prep'=>q\drop table if exists pig_write_4;
create table pig_write_4(
            name string,
            age int,
            gpa double)
stored as sequencefile;
\
                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age, 0.1;
c = foreach b generate name, age, $2 as gpa;
store c into 'pig_write_4' using org.apache.hive.hcatalog.pig.HCatStorer();\,
				,'sql' => q\select name, age, 0.1 from studenttab10k;\
                                ,'result_table' => 'pig_write_4'
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 # Write to a table in the non-default database
                                 'num' => 5
                                ,'hcat_prep'=>q?create database if not exists pig_db_1;
create table if not exists pig_db_1.pig_write_5 (name string, age int) row format delimited fields terminated by '\t' stored as textfile;?
                                ,'pig' => q\a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name, age;
store b into 'pig_db_1.pig_write_5' using org.apache.hive.hcatalog.pig.HCatStorer();\,
				                ,'sql' => q\select name, age from studenttab10k;\
                                ,'result_table' => 'pig_db_1.pig_write_5'
                                }
                        ],
                }, # end g
                {
                        'name' => 'Pig_Change_Schema',
                        'tests' => [
                            {
                                 # I don't like this, I'm using one test to setup for the next.  But I don't know how else to do this.
                                 'num' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_change_schema_1;
create table pig_change_schema_1 (name string) partitioned by (ds string) STORED AS TEXTFILE;\
                                ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = filter a by ds == '20110924';
c = foreach b generate name;
store c into 'pig_change_schema_1' using org.apache.hive.hcatalog.pig.HCatStorer('ds=20110924');\,
                                ,'result_table' => 'pig_change_schema_1'
				,'sql' => q\select name, ds from studentparttab30k where ds='20110924';\
                                },
                                {
                                 # I don't like this, I'm using one test to setup for the next.  But I don't know how else to do this.
                                 'num' => 2
                                ,'depends_on' => 'Pig_Change_Schema_1'
                                ,'hcat_prep'=>q\alter table pig_change_schema_1 add columns (age int);\
                                ,'pig' => q\a = load 'studentparttab30k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = filter a by ds == '20110925';
c = foreach b generate name, age;
store c into 'pig_change_schema_1' using org.apache.hive.hcatalog.pig.HCatStorer('ds=20110925');\,
                                ,'result_table' => 'pig_change_schema_1'
				                ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924')
                                            union all
				                            (select name, age, ds from studentparttab30k where ds = '20110925');\
                                },
                                {
                                 # I don't like this, I'm using one test to setup for the next.  But I don't know how else to do this.
                                 'num' => 3
                                , 'depends_on' => 'Pig_Change_Schema_2'
                                ,'pig' => q\a = load 'pig_change_schema_1' using org.apache.hive.hcatalog.pig.HCatLoader();
c = foreach a generate name, age, ds;
store c into ':OUTPATH:';\
				                ,'sql' => q\(select name, '', ds from studentparttab30k where ds='20110924')
                                            union all
				                            (select name, age, ds from studentparttab30k where ds = '20110925');\
                                }
                        ],
                },
                {
                        'name' => 'Pig_HCat_Barrier',
                        'tests' => [
                                {
                                 'num' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_1;
create table pig_hcat_barrier_1 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\
                                ,'pig' => q\
a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double);
store a into 'pig_hcat_barrier_1' using org.apache.hive.hcatalog.pig.HCatStorer('b=1'); \,
                                ,'expected_err_regex' => 'not supported'
                                },
                                {
                                 'num' => 2
                                ,'hcat_prep'=>q\drop table if exists pig_hcat_barrier_2;
create table pig_hcat_barrier_2 (name string, age int, gpa double) partitioned by (b string) CLUSTERED BY (name) SORTED BY (name) INTO 1 BUCKETS STORED AS TEXTFILE;\
                                ,'pig' => q\
a = load ':INPATH:/studenttab10k' as (name:chararray, age:int, gpa:double);
store a into 'pig_hcat_barrier_2' using org.apache.hive.hcatalog.pig.HCatStorer('b=1'); \,
                                ,'expected_err_regex' => 'not supported'
                                },
                        ],
                }, # end g
                {
                        'name' => 'Pig_HCAT_COOP',
                        'tests' => [
                                {
                                 # test if Pig can load the table after various table schema change
                                 'num' => 1
                                ,'hcat_prep'=>q:drop table if exists pig_hcat_coop_1;
create external table pig_hcat_coop_1 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as TEXTFILE;
alter table pig_hcat_coop_1 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';
alter table pig_hcat_coop_1 partition(b='1') set fileformat TEXTFILE;
alter table pig_hcat_coop_1 change gpa registration string;
alter table pig_hcat_coop_1 add columns (contributions float);
alter table pig_hcat_coop_1 add partition (b='2') location '/user/hcat/tests/data/votertab10k';
alter table pig_hcat_coop_1 partition(b='2') set fileformat TEXTFILE;
alter table pig_hcat_coop_1 replace columns (name string, age int);
:
                                ,'pig' => q\
a = load 'pig_hcat_coop_1' using org.apache.hive.hcatalog.pig.HCatLoader();
store a into ':OUTPATH:';\,
				,'sql'   => q\select name, age, '1' from studenttab10k union all select name, age, '2' from votertab10k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },
                                {
                                 # test if Pig can load table after fileformat change and table schema change
                                 'num' => 2
                                ,'hcat_prep'=>q:drop table if exists pig_hcat_coop_2;
create external table pig_hcat_coop_2 (name string, age int, gpa double) partitioned by (b string) row format delimited fields terminated by '\t' stored as TEXTFILE;
alter table pig_hcat_coop_2 add partition (b='1') location '/user/hcat/tests/data/studenttab10k';
alter table pig_hcat_coop_2 partition(b='1') set fileformat TEXTFILE;
alter table pig_hcat_coop_2 add partition (b='2') location '/user/hcat/tests/data/all100krc';
alter table pig_hcat_coop_2 partition(b='2') set fileformat RCFILE;
alter table pig_hcat_coop_2 replace columns (age int, name string);
:
                                ,'pig' => q\
a = load 'pig_hcat_coop_2' using org.apache.hive.hcatalog.pig.HCatLoader();
store a into ':OUTPATH:';\,
				,'sql'   => q\select age, name, '1' from studenttab10k union all select age, name, '2' from all100krc;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                }
                        ],
                },{
                        'name' => 'Pig_Complex',
                        'tests' => [
                                {
                                 # test reading tuples from the complex table
                                 'num' => 1 
                                ,'pig' => q\
a = load 'studentcomplextab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate flatten(s); 
store b into ':OUTPATH:';\,
				                ,'sql'   => q\select IFNULL(name, ""), IFNULL(age, ""), IFNULL(gpa, "") from studentcomplextab10k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },{
                                 # test reading maps from the complex table
                                 'num' => 2 
                                ,'pig' => q\
a = load 'studentcomplextab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate s.name as n1, m#'name' as n2;
c = filter b by n1 != '' and n2 is not null;
store c into ':OUTPATH:';\,
				                ,'sql'   => q\select t.name, m.mvalue
                                              from studentcomplextab10k t, studentcomplextab10k_map m
                                              where t.id = m.tid and t.name is not null and m.mkey = 'name';\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },{
                                 # test reading arrays from the complex table
                                 'num' => 3 
                                ,'pig' => q\
a = load 'studentcomplextab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate s.name as n1, flatten(a);
c = filter b by n1 != '' ;
store c into ':OUTPATH:';\,
				                ,'sql'   => q\select t.name, m.lvalue
                                              from studentcomplextab10k t, studentcomplextab10k_list m
                                              where t.id = m.tid and t.name is not null;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },{
                                 # test writing tuples to a complex table.  This also tests reading with default separators.
                                 'num' => 4 
                                 ,'notmq' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_complex_4;
create table pig_complex_4 (s struct<name: string, age: int, gpa: double>) STORED AS TEXTFILE;\
                                ,'pig' => q\
a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate TOTUPLE(name, age, gpa) as s;
store b into 'pig_complex_4' using org.apache.hive.hcatalog.pig.HCatStorer();
exec;
c = load 'pig_complex_4' using org.apache.hive.hcatalog.pig.HCatLoader();
d = foreach c generate flatten(s);
store d into ':OUTPATH:';\
				                ,'sql'   => q\select name, age, gpa from studenttab10k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                },{
                                 # test writing maps to a complex table.  This also tests reading with default separators.
                                 'num' => 5 
                                 ,'notmq' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_complex_5;
create table pig_complex_5 (m map<string, string>) STORED AS TEXTFILE;\
                                ,'pig' => q\
a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate TOMAP('name', name, 'age', (chararray)age, 'gpa', (chararray)gpa) as m;
store b into 'pig_complex_5' using org.apache.hive.hcatalog.pig.HCatStorer();
exec;
c = load 'pig_complex_5' using org.apache.hive.hcatalog.pig.HCatLoader();
d = foreach c generate m#'name', m#'age', m#'gpa';
store d into ':OUTPATH:';\
				                ,'sql'   => q\select name, age, gpa from studenttab10k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                }, {
                                 # test writing bags to a complex table.  This also tests reading with default separators.
                                 'num' => 6 
                                 ,'notmq' => 1
                                ,'hcat_prep'=>q\drop table if exists pig_complex_6;
create table pig_complex_6 (a array<string>) STORED AS TEXTFILE;\
                                ,'pig' => q\
a = load 'studenttab10k' using org.apache.hive.hcatalog.pig.HCatLoader();
b = foreach a generate name;
c = distinct b;
d = group c all;
e = foreach d generate $1 as a;
store e into 'pig_complex_6' using org.apache.hive.hcatalog.pig.HCatStorer();
exec;
f = load 'pig_complex_6' using org.apache.hive.hcatalog.pig.HCatLoader();
g = foreach f generate flatten(a);
store g into ':OUTPATH:';\
				                ,'sql'   => q\select distinct name from studenttab10k;\
                                ,'floatpostprocess' => 1
                                ,'delimiter' => '	'
                                }
                        ],
                } # end g
         ]
}
